# =================== Load Packages ===================
remove(list=ls())
library(data.table)
library(ggplot2)
library(zoo)
library(stargazer)
library(sandwich)
library( lmtest )
library(readxl)
library(tseries)
library(gmm)
library(car)
library(foreign)
library("AER")  # contains the IV package
# === Set path ===
currentPath = dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(currentPath) # set the path of this R file as the current path
source("package_function_and_path.R")

# ===================== set up the directory =====================
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
currentPath = getwd()
FigurePath = paste0(currentPath,"/Figures/")
setwd(currentPath) # set the path of this R file as the current path

# Important: Define the appropriate GMM function for estimations.
gmm_customized <- function( g,x,t0=NULL,gradv=NULL, optfct="nlminb", lower=NULL, upper=NULL, type="twoStep", wmatrix="optimal", prewhite=1, bw=4, itermax=1000  ){
  return( gmm(  g,x,t0=t0, gradv=gradv, type=type, wmatrix = wmatrix, vcov="HAC", kernel="Bartlett",  bw=bw,  itermax = itermax , optfct=optfct, lower=lower, upper=upper, prewhite = prewhite ) )
}
# GMM multiple initial point search. 
gmm_customized_multi_init <- function( g, x ,t0_list=NULL,gradv=NULL, optfct="nlminb", lower=NULL, upper=NULL, type="twoStep", wmatrix="optimal", prewhite=0, bw=4  ){
  obj_vec = rep(Inf, length(t0_list))
  result_list = list()
  for( iter in 1:length(t0_list) ){
    t0 = t0_list[[iter]]
    result_list[[iter]] = try(
      gmm(  g,x,t0, gradv=gradv, type=type, wmatrix = wmatrix, vcov="HAC", kernel="Bartlett",  bw=bw,  itermax = 1000 , optfct=optfct, lower=lower, upper=upper, prewhite = prewhite )
    )
    if(is.numeric(result_list[[iter]])){
      obj_vec[iter] = as.numeric(result_list[[iter]]$objective)
    }
  }
  best_result = which.min(obj_vec)
  return(  result_list[[best_result]] )
}

# Calculate R2
R2_fun <- function( predictions, data ){
  residuals = data - predictions
  index = !is.na(residuals)
  R2 = 1 - sum(residuals[index]^2)/sum((data[index]-mean(data))^2)
  return( R2 )
}

R2s_fun <- function(regs){
  R2_vec = c()
  for(i in 1:length(regs)){
    reg = regs[[i]]
    predictions = predict(reg)
    residuals = residuals(reg)
    data = predictions + residuals
    R2 = 1 - sum(residuals^2)/sum(data^2)
    R2_vec = c(R2_vec, R2)
  }
  return( R2_vec )
}

p_values_fun <- function(regs){
  p_value_vec = rep(0, length(regs))
  for(iter in 1:length(p_value_vec)){
    p_value_vec[iter] = summary(regs[[iter]])$stest$test[2] 
  }
  return(p_value_vec)
}
# Plot custimized figures
plot_fun <- function(  FileName,  x=NULL,  yseries_input, margin = c(2, 2, 0.5, 0.5), width = 6, height = 4, PlotScale=1.2, Linewidth=2, LegendPosition="topleft", LegendNames=NA, xlab="", ylab="", ylim=NA, colors=NA, cex.lab=1, line="NA", lineposition=0 ){
  pdf( FileName, width, height)
  par(mar=margin, mfrow=c(1,1))
  MyPlot( x, yseries_input,  PlotScale, Linewidth, LegendPosition, LegendNames, xlab, ylab, ylim, colors, cex.lab )
  if( line=="h" ){
    abline( h=lineposition, col="grey", lwd=1, lty=2 )
  }else if(line=="v"){
    abline( v=lineposition, col="grey", lwd=1, lty=2 )
  }
  dev.off()
}
regs_print_fun <- function(regs){
  stargazer( regs, omit = c("f"), omit.stat=c("rsq","f","ser"), se=RobustSE(regs)  , df = FALSE, digits = 2 , column.sep.width="0pt", type="text"  )
}

# Winsorize
winsorize_down_fun <- function(x, q=0.99){
  return( pmin( x,  quantile(x,q,na.rm=TRUE) )  )
}

# *** Definitions for Difference Regressions 
GenDiff <- function( variable, Lag=1 ){
  index = 1:(length(variable)-abs(Lag))
  if(Lag>0){
    return(  c( variable[index+Lag] - variable[index],  rep(NA,Lag))  )
  }else{
    Lag = -Lag
    return(  c(rep(NA,Lag), variable[index+Lag] - variable[index])  )
  }
}
GenLogDiff <- function( variable, Lag=1 ){  # Diff negative means shifting forward.
  return(GenDiff(log(variable),Lag))
}
cragg_donald_stats <- function(endogenous,instruments){  # Generate the Cragg Donald stats for weak instrument test
  residuals <- matrix(0, ncol = ncol(endogenous), nrow = nrow(endogenous))
  for (i in 1:ncol(endogenous)) {
    DT = data.frame( instruments )
    mod.aux <- lm(endogenous[,i] ~ ., data=DT)
    residuals[,i] <- resid(mod.aux)
  }
  Pz <- instruments %*% solve(t(instruments) %*% instruments) %*% t(instruments)
  Sv <- cov(residuals)
  sqrt.Sv <- chol(Sv)
  inv.Sv <- solve(sqrt.Sv)
  G <- t(inv.Sv) %*% t(endogenous) %*% Pz %*% endogenous %*% inv.Sv/ncol(instruments)
  g <- min(eigen(G)$values)
  return(g)
}
CD_stats_from_ivreg <- function( ivreg ){  # Generate the CD stat from an ivreg object
  rank = ivreg$rank
  value_Matrix = ivreg$model
  endogenous = as.matrix(value_Matrix[ , 2:rank ])
  instruments = as.matrix(value_Matrix[ , (rank+1):ncol(value_Matrix) ])
  return( cragg_donald_stats(endogenous, instruments) )
}
CD_stats_vec_from_ivregs <- function( regs ){
  N = length(regs)
  CD_vec <- rep("", N)
  for(i in 1:N){
    if( exists( "contrasts",  regs[[i]]) ){  # IV reg
      CD_vec[i] = round(CD_stats_from_ivreg( regs[[i]] ),digits=2)
    }
  }
  return(CD_vec)
}




# define the time periods of WWII
WWII_periods = seq( 1942, 1951 )

# ==================================== Read Data and Define New Variables ==================================== 
TB = as.data.table(read_xlsx(  "data_collection_monthly.xlsx" ))
TB[ , YearMon:=as.Date(YearMon) ]
TB[ , Year := year(YearMon) ]
# Note: change the units into Billion dollars

# **************** Bank Leverage as Inverse of Capital Ratio **************** 
TB[ , HKM_Lvg:=1/intermediary_capital_ratio ]
TB[ , Lvg:= HKM_Lvg]
TB[ is.na(Lvg) ,    Lvg:= 1/CapitalRatio_Constructed]

# **************** Robustness: Different Definitions of the Liquidity Premium **************** 
# Two options: use the repo--treasury spread after 1991, or use the PCA of refcorp-treasury spread. 
TB[ , LiqPrem := RepoT ]
# TB[ , LiqPrem := RefT_spread ]

# ***************** Concatenation of VIX index ****************
TB[ , VIX:=VIX_constructed ]   # Before 1990, just use the projected VIX.   After 1990, use the raw VIX index.  

# **************** Construction of Deposits/GDP ratios **************** 
TB = TB[ order(YearMon) ]
# Note: deposits quantities coming from FRED flow of funds data (the measurements are similar to aggregations from CALL reports, but flow of funds data have longer periods)
TB[ , Core_Deposits:=checking+saving ]
TB[ , Checking_GDP:=checking/GDP_nominal ]
TB[ , Saving_GDP:=saving/GDP_nominal ]
TB[ , TimeDep_GDP:=time/GDP_nominal ]
TB[ , Checking_deposit_spread:= FFR-checking_rate]
TB[ , Saving_deposit_spread:= FFR-saving_rate]
TB[ , Year:=year(YearMon)  ]

# MZM money. 
TB[ , MZM_to_GDP := MZMNS/GDP_nominal ]
MyPlot(  TB$YearMon, TB[ , list( MZM_to_GDP,  Core_Deposits/GDP_nominal, (Core_Deposits+time)/GDP_nominal ) ], LegendNames = c("MZM/GDP", "Core Deposits/GDP", "Total Deposits/GDP") )


# **************** Constructin of the Winsorized and All-positive Liquidity Premium Series **************** 
# Note: LiqPrem is still the raw series without additional processing. 
lowest_positive_LiqPrem = min(TB[LiqPrem>0]$LiqPrem, na.rm=TRUE)
quantile(TB$LiqPrem, 0.1, na.rm=TRUE)
# Important: LiqPrem_Log is defined as the logarithm of liquidity premium for only positive liquidity premium. Otherwise just set to NA
TB[ LiqPrem>0 , LiqPrem_Log := log(LiqPrem) ]
TB[ LiqPrem <= 0 , LiqPrem_Log := NA ]
# Important: the LiqPrem_winsorized winsorizes using the lowest positive liquidity premium in the data. 
TB[ , LiqPrem_winsorized := LiqPrem ]
TB[ LiqPrem <= 0, LiqPrem_winsorized := lowest_positive_LiqPrem ]

# **************** Define Deposits Spread *************
TB[ , Deposits_Core := checking+saving ]  # Total deposits.  checking and saving quantities are coming from flow of funds
TB[ , Deposits_Total := checking+saving+time ]  # Total deposits.  checking and saving quantities are coming from flow of funds

TB[ , Checking_Saving_AvgRate := (checking_rate*checking+saving_rate*saving)/Deposits_Core ] # This is more informative
TB[ , Deposits_Spread_checking := FFR-checking_rate ]
TB[ , Deposits_Spread_saving := FFR-saving_rate ]
TB[ , Deposits_Spread_samlltime := FFR-smalltime_rate ]
TB[ , Deposits_Spread_checking_and_saving := (Deposits_Spread_checking*checking+Deposits_Spread_saving*saving)/Deposits_Core ]
TB[ , Deposits_Spread_adding_smalltime := (Deposits_Spread_checking*checking+Deposits_Spread_saving*saving+Deposits_Spread_samlltime*time)/Deposits_Total ]
TB[ , Deposits_GDP := Deposits_Total/GDP_nominal ]  
TB[ , DepositsCore_GDP := Deposits_Core/GDP_nominal ]  
TB[ , DepositsTotal_GDP := Deposits_Total/GDP_nominal ]  

TB[ , MZM_GDP := MZMNS/GDP_nominal ]
TB[ , DebtToDeposits :=  DebtToGDP/Deposits_GDP  ]

TB[ , Deposits_GDP_standardized := scale(Deposits_GDP) ]
TB[ , DebtToGDP_standardized := scale(DebtToGDP) ]
TB[ , FFR_standardized := scale(FFR) ]
TB[ , LiqPrem_standardized := scale(LiqPrem) ]
TB[ , VIX_standardized := scale(VIX) ]
TB[ , DebtToDeposits_standardized := scale( DebtToGDP/Deposits_GDP ) ]
TB[ , TbillToDeposits := TbillToGDP/Deposits_GDP ]

reg = lm( Deposits_Spread_adding_smalltime~0+FFR  , data=TB)
deposits_spread_sensitivity_to_FFR= reg$coefficients[1]
MyPlot( TB$YearMon, TB[ , list(reg$coefficients[1]*FFR, Deposits_Spread_checking_and_saving) ], LegendNames = c("FFR projection", "Deposits Spread (checking+saving)") )
TB[ , Deposits_Spread_projection := deposits_spread_sensitivity_to_FFR*FFR ]

# **************** Define Standardized Variables *************
TB[ , Deposits_GDP_standardized := scale(Deposits_GDP) ]
TB[ , DebtToGDP_standardized := scale(DebtToGDP) ]
TB[ , FFR_standardized := scale(FFR) ]
TB[ , LiqPrem_standardized := scale(LiqPrem) ]
TB[ , VIX_standardized := scale(VIX) ]
TB[ , DebtToDeposits_standardized := scale( DebtToGDP/Deposits_GDP ) ]


# ***** MMF Liquidity Provision **********
TB[ , MMF_to_GDP := MMF_total_size/GDP_nominal ]
TB[ , DebtToGDP_excluding_MMF := DebtToGDP - MMF_Tsy_holding/GDP_nominal ]
TB[ , LiqPrem_MMF := FFR - MMF_rate ]

MyPlot(TB$YearMon, TB[ , list( DebtToGDP, DebtToGDP_excluding_MMF ) ], LegendNames = c("Debt to GDP excluding bank holding and Fed holding", "Debt to GDP  excluding bank holding, MMF holding, and Fed holding") )

MyPlot( TB$YearMon, TB[ , list(FFR, MMF_rate) ], LegendNames = c("FFR" ,"MMF rate") )
MyPlot( TB$YearMon, TB[ , list(FFR, 10*(FFR-MMF_rate)) ], LegendNames = c("FFR" ,"FFR-MMF spread (*10)") )
abline(h=0)

reg = lm( FFR-MMF_rate ~ FFR , data=TB )
summary(reg)


# Different Versions of Deposits Spread
TB[ , Deposit_Spread := Deposits_Spread_projection ]
TB[ , Repo3M:=RepoT+Treasury3Mon]
TB[ , CD_Tsy_3M :=  CD3M_Rate -Treasury3Mon  ]
# New deifnitions
TB[ , KVJ_net_deposits:=KVJ_fin_sector_debt_to_GDP-DepositsCore_GDP  ]

TB[ , P2CP3M := AACP3M+P2_P1_spread ] 

MyPlot( TB$YearMon, TB$KVJ_fin_sector_debt_to_GDP )
MyPlot( TB$YearMon, TB$DepositsCore_GDP )

MyPlot( TB$YearMon, TB$KVJ_net_deposits )

TB[ , P2_CP_Deposits := Deposit_Spread - FFR + P2CP3M  ]
TB[ , P2_CP_Repo := P2CP3M - Repo3M  ]
TB[ , P2_CP_MMF := pmax(P2CP3M - MMF_rate,0) ]
TB[ , M2_GDP := M2/GDP_nominal ]

reg_projection = lm( P2_CP_Deposits ~ FFR, data=TB )
TB[ , CP_deposit_spread_projection:= reg_projection$coefficients[1] + FFR*reg_projection$coefficients[2] ]

# ========================== Add More Definitions ===========================
index = !is.element(TB$Year, WWII_periods)
TB[ , FFR_and_Log_DebtToGDP :=  FFR* log(DebtToGDP) ]
TB[ , FFR_and_Log_DebtToDeposits :=  FFR* log(DebtToDeposits) ]
TB[ , FFR_and_Log_TBillToGDP :=  FFR* log(TbillToGDP) ]
TB[ , VIX_and_Log_TBillToGDP :=  VIX* log(TbillToGDP) ]
TB[ , VIX_and_FFR :=  VIX* FFR ]

TB[ , raw_DebtToDeposits := DebtToGDP_raw/Deposits_GDP ]
TB[ , Debt_to_MZM := DebtToGDP_raw/MZM_to_GDP ]
TB[ , raw_Debt_to_MZM := DebtToGDP_raw/MZM_to_GDP ]

TB[ , Debt_to_KVJ_Deposits := DebtToGDP/KVJ_fin_sector_debt_to_GDP ]
TB[ , raw_Debt_to_KVJ_Deposits := DebtToGDP_raw/KVJ_fin_sector_debt_to_GDP ]

TB[ , VIX_and_Log_DebtToGDP_and_FFR :=  VIX* log(DebtToGDP) * FFR ]
TB[ , VIX_and_Log_raw_DebtToGDP_and_FFR :=  VIX* log(DebtToGDP_raw) * FFR ]
TB[ , VIX_and_Log_DebtToDeposits_and_FFR :=  VIX* log(DebtToDeposits) * FFR ]
TB[ , VIX_and_Log_raw_DebtToDeposits_and_FFR :=  VIX* log(raw_DebtToDeposits) * FFR ]
TB[ , VIX_and_Log_DebtToKVJ_deposits_and_FFR :=  VIX* log(Debt_to_KVJ_Deposits) * FFR ]
TB[ , VIX_and_Log_raw_DebtToKVJ_deposits_and_FFR :=  VIX* log(raw_Debt_to_KVJ_Deposits) * FFR ]

TB[, Deposits_Spread_concatenated := FFR-Deposits_Rate_Historical]
TB[ !is.na(Deposits_Spread_checking_and_saving), Deposits_Spread_concatenated := Deposits_Spread_checking_and_saving]
TB[, Deposits_Spread_concatenated_all_deposits := Deposits_Spread_concatenated]
TB[ !is.na(Deposits_Spread_adding_smalltime), Deposits_Spread_concatenated_all_deposits := Deposits_Spread_adding_smalltime]

# Summarize the post-crisis periods, 2009-2021
summary(TB[Year>2009 & Year<=2020,list(FFR, VIX, DebtToDeposits, Deposits_Spread_adding_smalltime, RepoT)])

# Find the time horizon for the main regressions
max(TB[!is.na(RepoT)]$YearMon)
TB = TB[ !is.na(RepoT) ]   ### Note: For our results, we should leave this


# ====================== Define Quarter End ======================
Quarter_End_Months = as.yearmon(TB[,list(last(YearMon)),by=as.yearqtr(YearMon)]$V1)
TB[ , Is_Quarter_End := is.element(as.yearmon(YearMon), Quarter_End_Months) ]
TB_full = TB
